Parkinson’s Disease -Omics: scRNA-seq of Monocytes

<<<<<<< HEAD ## Setup

## Automatically generates rmarkdown output file
knitr::opts_chunk$set(echo=T, error=T, cache=T, cache.lazy=F)

Load Libraries & Report Versions

library(Seurat)
library(dplyr)
library(gridExtra)  

sessionInfo()
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS release 6.9 (Final)
## 
## Matrix products: default
## BLAS/LAPACK: /hpc/packages/minerva-common/intel/parallel_studio_xe_2018/compilers_and_libraries_2018.1.163/linux/mkl/lib/intel64_lin/libmkl_gf_lp64.so
## 
## locale:
## [1] C
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] gridExtra_2.3 dplyr_0.7.6   Seurat_2.3.4  Matrix_1.2-14 cowplot_0.9.3
## [6] ggplot2_3.0.0
## 
## loaded via a namespace (and not attached):
##   [1] tsne_0.1-3          segmented_0.5-3.0   nlme_3.1-137       
##   [4] bitops_1.0-6        bit64_0.9-7         RColorBrewer_1.1-2 
##   [7] httr_1.3.1          rprojroot_1.3-2     prabclus_2.2-6     
##  [10] tools_3.5.1         backports_1.1.2     irlba_2.3.2        
##  [13] R6_2.3.0            rpart_4.1-13        KernSmooth_2.23-15 
##  [16] Hmisc_4.1-1         lazyeval_0.2.1      colorspace_1.3-2   
##  [19] trimcluster_0.1-2.1 nnet_7.3-12         withr_2.1.2        
##  [22] tidyselect_0.2.4    bit_1.1-14          compiler_3.5.1     
##  [25] htmlTable_1.12      hdf5r_1.0.0         diptest_0.75-7     
##  [28] caTools_1.17.1      scales_0.5.0        checkmate_1.8.5    
##  [31] lmtest_0.9-36       DEoptimR_1.0-8      mvtnorm_1.0-8      
##  [34] robustbase_0.93-1.1 ggridges_0.5.0      pbapply_1.3-4      
##  [37] dtw_1.20-1          proxy_0.4-22        stringr_1.3.1      
##  [40] digest_0.6.17       mixtools_1.1.0      foreign_0.8-70     
##  [43] rmarkdown_1.10      R.utils_2.7.0       base64enc_0.1-3    
##  [46] pkgconfig_2.0.2     htmltools_0.3.6     htmlwidgets_1.2    
##  [49] rlang_0.2.1         rstudioapi_0.7      bindr_0.1.1        
##  [52] jsonlite_1.5        zoo_1.8-3           ica_1.0-2          
##  [55] mclust_5.4.1        gtools_3.8.1        acepack_1.4.1      
##  [58] R.oo_1.22.0         magrittr_1.5        modeltools_0.2-22  
##  [61] Formula_1.2-3       lars_1.2            Rcpp_1.0.0         
##  [64] munsell_0.5.0       reticulate_1.7      ape_5.1            
##  [67] R.methodsS3_1.7.1   stringi_1.2.4       yaml_2.1.19        
##  [70] MASS_7.3-50         flexmix_2.3-14      gplots_3.0.1       
##  [73] Rtsne_0.13          plyr_1.8.4          grid_3.5.1         
##  [76] parallel_3.5.1      gdata_2.18.0        crayon_1.3.4       
##  [79] doSNOW_1.0.16       lattice_0.20-35     splines_3.5.1      
##  [82] SDMTools_1.1-221    knitr_1.20          pillar_1.3.0       
##  [85] igraph_1.2.1        fpc_2.1-11          reshape2_1.4.3     
##  [88] codetools_0.2-15    stats4_3.5.1        glue_1.3.0         
##  [91] evaluate_0.11       metap_0.9           latticeExtra_0.6-28
##  [94] data.table_1.11.8   png_0.1-7           foreach_1.4.4      
##  [97] tidyr_0.8.1         gtable_0.2.0        RANN_2.6           
## [100] purrr_0.2.5         kernlab_0.9-26      assertthat_0.2.0   
## [103] class_7.3-14        survival_2.42-6     tibble_1.4.2       
## [106] snow_0.4-3          iterators_1.0.10    bindrcpp_0.2.2     
## [109] cluster_2.0.7-1     fitdistrplus_1.0-9  ROCR_1.0-7
print(paste("Seurat ", packageVersion("Seurat")))
## [1] "Seurat  2.3.4"

Load Data

#setwd("~/Desktop/PD_scRNAseq/")
dir.create(file.path("Data"), showWarnings = FALSE) 
load("Data/seurat_object_add_HTO_ids.Rdata")
pbmc <- seurat.obj  
str(pbmc@raw.data)
## Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
##   ..@ i       : int [1:19051135] 3 6 8 9 10 12 13 14 16 18 ...
##   ..@ p       : int [1:27864] 0 10681 11185 13802 16136 18413 20563 22849 25028 27160 ...
##   ..@ Dim     : int [1:2] 24914 27863
##   ..@ Dimnames:List of 2
##   .. ..$ : chr [1:24914] "A1BG" "A1BG-AS1" "A2M" "A2M-AS1" ...
##   .. ..$ : chr [1:27863] "AAGCAGTGGTATCAAC" "TCTGTCTTCCAGTTTT" "GTCCTCATCCGCATCT" "CGGAGTCAGTGTTAGA" ...
##   ..@ x       : num [1:19051135] 1 1 1 1 1 1 1 1 1 1 ...
##   ..@ factors : list()

Clean Metadata

library(readxl)

## Update dx and mut fields
### Import updated  dx, mut info
subjectInfo <- read_excel("Data/scRNAseq_meta.xlsx") 
### Import metadata
meta <- read.table("Data/meta.data3.tsv", header=T )
### Remove last 2 cols (contain incorrect values) and duplicate HTO col
sum(meta$HTO != meta$HTO.1) # double check they're identical

meta["barcode"] = row.names(meta)
unique(meta["HTO"])
# Map incorrect subject IDs to CORRECT subject IDs (typos occurred at NYGC during processing)
map = setNames( # Incorrect (from NYGC)
               c("NYUMD0011", "BIMD0076", "MSMD0067", "BIMD0077", "BIMD0007",
                 "BIMD0075", "NYUMD0015", "MSMD0035","MSMD0207", "BIMD0010"), 
               # Correct (from Evan)
              c("NYUMD0011", "BID0076", "MSMD0067", "BID0077", "BIMD0007",
                 "BID00075","NYUMD0015", "MSMD0035", "MSMD0207", "BIMD0010")
              )
meta["ID"] <- map[unlist(meta["HTO"])]
meta <- subset(meta, select = -c(dx, mut, HTO, HTO.1)) 
### Merge new cols
metadata <- merge(meta, subjectInfo, by="ID", all.x=T) 
row.names(metadata) <- metadata$barcode
colnames(metadata)[colnames(metadata)=="Ethnitcity"] <- "Ethnicity" 

dim(meta)
dim(metadata)
head(metadata)
# Correct misspelling
### Export updated Metadata
write.table(metadata, "Data/meta.data4.tsv") 

# Add metadata to seurat object
pbmc <- AddMetaData(object = pbmc, metadata =  metadata ) 
pbmc
## % Mitochondrial genes metadata
#mito.genes <- grep(pattern = "^MT-", x = rownames(x = pbmc@data), value = TRUE)
#percent.mito <- Matrix::colSums(pbmc@raw.data[mito.genes, ])/Matrix::colSums(pbmc@raw.data)
#pbmc <- AddMetaData(object = pbmc, metadata = percent.mito, col.name = "percent.mito")

Subset for Testing

metadata <- read.table("Data/meta.data4.tsv")  
head(metadata)
##                        ID nGene nUMI orig.ident singlet.or.not.binary
## AAAGCAAGTTTGTTGG BIMD0007   784 1780  RAJ_13357                     1
## TCAGCAATCTTGACGA BIMD0007   742 1854  RAJ_13357                     1
## AGCTCCTTCGCGTAGC BIMD0007   495  988  RAJ_13357                     1
## TATTACCCACTCTGTC BIMD0007   812 1874  RAJ_13357                     1
## CTCGAGGAGCGATTCT BIMD0007   863 2260  RAJ_13357                     1
## ATAAGAGCATCAGTCA BIMD0007   803 2034  RAJ_13357                     1
##                  percent.mito res.2 res.1 res.0.6 res.0.3 CellType
## AAAGCAAGTTTGTTGG   0.03146067     3     3       1       0        0
## TCAGCAATCTTGACGA   0.03022126    18     1       0       0        0
## AGCTCCTTCGCGTAGC   0.04048583    14     7       2       3        3
## TATTACCCACTCTGTC   0.04695838    16    12       7       6        6
## CTCGAGGAGCGATTCT   0.02125775     1     0       1       0        0
## ATAAGAGCATCAGTCA   0.02163225     9     8       0       0        0
##                           barcode dx mut Ethnicity Gender Age
## AAAGCAAGTTTGTTGG AAAGCAAGTTTGTTGG PD  PD     White      M  59
## TCAGCAATCTTGACGA TCAGCAATCTTGACGA PD  PD     White      M  59
## AGCTCCTTCGCGTAGC AGCTCCTTCGCGTAGC PD  PD     White      M  59
## TATTACCCACTCTGTC TATTACCCACTCTGTC PD  PD     White      M  59
## CTCGAGGAGCGATTCT CTCGAGGAGCGATTCT PD  PD     White      M  59
## ATAAGAGCATCAGTCA ATAAGAGCATCAGTCA PD  PD     White      M  59
# Make AgeGroups
makeAgeGroups <- function(){
  dim(metadata)
  getMaxRound <- function(vals=metadata$Age, unit=10)unit*ceiling((max(vals)/unit))
  getMinRound <- function(vals=metadata$Age, unit=10)unit*floor((min(vals)/unit)) 
   
  ageBreaks = c(seq(getMinRound(), getMaxRound(), by = 10), getMaxRound()+10)
  AgeGroupsUniq <- c()
  for (i in 1:(length(ageBreaks)-1)){ 
    AgeGroupsUniq <- append(AgeGroupsUniq, paste(ageBreaks[i],ageBreaks[i+1], sep="-")) 
  } 
  data.table::setDT(metadata,keep.rownames = T,check.names = F)[, AgeGroups := cut(Age, 
                                  breaks = ageBreaks, 
                                  right = F, 
                                  labels = AgeGroupsUniq,
                                  nclude.lowest=T)]
  metadata <- data.frame(metadata)
  unique(metadata$AgeGroups)
  head(metadata)
  dim(metadata)
  return(metadata)
}
# metadata <- makeAgeGroups()

pbmc <- AddMetaData(object = pbmc, metadata = metadata)  
# Get rid of any NAs (cells that don't match up with the metadata)
pbmc <- FilterCells(object = pbmc,  subset.names = "nGene", low.thresholds = 0,
                    # Subset for testing 
                    #cells.use = pbmc@cell.names[0:2000] 
                    )
head(pbmc@meta.data) 
##                  nGene nUMI orig.ident singlet.or.not.binary       HTO
## GTCATTTTCCGCATCT  2035 6729  RAJ_13357                     1 NYUMD0011
## ATAAGAGAGGAGTTGC  1914 6718  RAJ_13357                     1   BID0076
## CATTATCCATGGTCTA  1971 6156  RAJ_13357                     1  MSMD0067
## CTTAGGAGTGGCGAAT  1893 6392  RAJ_13357                     1 NYUMD0011
## CATCAGAAGCACCGCT  1967 6110  RAJ_13357                     1  MSMD0067
## GCGGGTTAGTAGCCGA  1868 5977  RAJ_13357                     1 NYUMD0011
##                         ID percent.mito res.2 res.1 res.0.6 res.0.3
## GTCATTTTCCGCATCT  MSMD0207   0.04191439     0     1       0       0
## ATAAGAGAGGAGTTGC  BIMD0076   0.04362066     0     1       0       0
## CATTATCCATGGTCTA NYUMD0015   0.03086921     0     1       0       0
## CTTAGGAGTGGCGAAT  MSMD0207   0.03613892     0     1       0       0
## CATCAGAAGCACCGCT NYUMD0015   0.04437531    19    10       8       0
## GCGGGTTAGTAGCCGA  MSMD0207   0.03514056     6     2       0       0
##                  CellType          barcode      dx     mut Ethnicity
## GTCATTTTCCGCATCT        0 GTCATTTTCCGCATCT      PD      PD     White
## ATAAGAGAGGAGTTGC        0 ATAAGAGAGGAGTTGC      PD     GBA     White
## CATTATCCATGGTCTA        0 CATTATCCATGGTCTA control control     White
## CTTAGGAGTGGCGAAT        0 CTTAGGAGTGGCGAAT      PD      PD     White
## CATCAGAAGCACCGCT        0 CATCAGAAGCACCGCT control control     White
## GCGGGTTAGTAGCCGA        0 GCGGGTTAGTAGCCGA      PD      PD     White
##                  Gender Age
## GTCATTTTCCGCATCT      M  71
## ATAAGAGAGGAGTTGC      M  47
## CATTATCCATGGTCTA      M  51
## CTTAGGAGTGGCGAAT      M  71
## CATCAGAAGCACCGCT      M  51
## GCGGGTTAGTAGCCGA      M  71

Diagnostic Plots

Violin Plots

VlnPlot(object = pbmc, features.plot = c("nGene", "nUMI", "percent.mito"), nCol = 3)

Gene Plots

par(mfrow = c(1, 2))
GenePlot(object = pbmc, gene1 = "nUMI", gene2 = "percent.mito")
GenePlot(object = pbmc, gene1 = "nUMI", gene2 = "nGene")

Filter & Normalize Data

pbmc <- FilterCells(object = pbmc, subset.names = c("nGene", "percent.mito"), 
    low.thresholds = c(200, -Inf), high.thresholds = c(2500, 0.05))

pbmc <- NormalizeData(object = pbmc, normalization.method = "LogNormalize", 
    scale.factor = 10000)

# Store the top most variable genes in @var.genes
pbmc <- FindVariableGenes(object = pbmc, mean.function = ExpMean, dispersion.function = LogVMR,
    x.low.cutoff = 0.0125, x.high.cutoff = 3, y.cutoff = 0.5)

pbmc <- ScaleData(object = pbmc, vars.to.regress = c("nUMI", "percent.mito"))
## Regressing out: nUMI, percent.mito
## 
## Time Elapsed:  2.00796855290731 mins
## Scaling data matrix

Dimensionality Reduction & Clustering

PCA

ProjectPCA scores each gene in the dataset (including genes not included in the PCA) based on their correlation with the calculated components. Though we don’t use this further here, it can be used to identify markers that are strongly correlated with cellular heterogeneity, but may not have passed through variable gene selection. The results of the projected PCA can be explored by setting use.full=T in the functions above

# Run PCA with only the top most variables genes
pbmc <- RunPCA(object = pbmc, pc.genes = pbmc@var.genes, do.print = TRUE, pcs.print = 1:5, 
    genes.print = 5)
## [1] "PC1"
## [1] "FCGR3A"   "RHOC"     "HLA-DPA1" "CDKN1C"   "IFITM1"  
## [1] ""
## [1] "S100A12"  "RNU1-60P" "RNVU1-13" "JUN"      "RNU2-59P"
## [1] ""
## [1] ""
## [1] "PC2"
## [1] "RNVU1-13" "RNU1-60P" "RNVU1-19" "IFITM3"   "DUSP1"   
## [1] ""
## [1] "S100A12"      "hsa-mir-6723" "PLBD1"        "FCER1A"      
## [5] "ALOX5AP"     
## [1] ""
## [1] ""
## [1] "PC3"
## [1] "FCGR3A"        "CDKN1C"        "IFITM2"        "IFITM1"       
## [5] "RP11-290F20.3"
## [1] ""
## [1] "HLA-DRB1" "HLA-DRB6" "HLA-DPB1" "HLA-DRB5" "CLEC10A" 
## [1] ""
## [1] ""
## [1] "PC4"
## [1] "CYBB"     "FGD2"     "C1orf56"  "S100A12"  "CDC42SE1"
## [1] ""
## [1] "GNLY"       "NKG7"       "CCL5"       "CTSW"       "PCED1B-AS1"
## [1] ""
## [1] ""
## [1] "PC5"
## [1] "HLA-DRB6" "HLA-DRB1" "HLA-DRB5" "HLA-DPA1" "HLA-DPB1"
## [1] ""
## [1] "C1orf56"  "CDC42SE1" "HNRNPH1"  "CTNNB1"   "FGD2"    
## [1] ""
## [1] ""
# PCA plots
VizPCA(object = pbmc, pcs.use = 1:2)

PCAPlot(object = pbmc, dim.1 = 1, dim.2 = 2)

pbmc <- ProjectPCA(object = pbmc, do.print = FALSE)

## PCA Heatmap: PC1
PCHeatmap(object = pbmc, pc.use = 1, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter

## PCA Heatmap: PC1-PCn
PCHeatmap(object = pbmc, pc.use = 1:12, cells.use = 500, do.balanced = TRUE, 
    label.columns = FALSE, use.full = FALSE)

Determine statistically significant PCs

NOTE: This process can take a long time for big datasets, comment out for expediency. More approximate techniques such as those implemented in PCElbowPlot() can be used to reduce computation time

#pbmc <- JackStraw(object = pbmc, num.replicate = 100, display.progress = FALSE)
PCElbowPlot(object = pbmc)

Cluster cells

We first construct a KNN graph based on the euclidean distance in PCA space, and refine the edge weights between any two cells based on the shared overlap in their local neighborhoods (Jaccard similarity). To cluster the cells, we apply modularity optimization techniques such as the Louvain algorithm (default) or SLM [SLM, Blondel et al., Journal of Statistical Mechanics], to iteratively group cells together, with the goal of optimizing the standard modularity function.

pbmc <- FindClusters(object = pbmc, reduction.type = "pca", dims.use = 1:10, 
    resolution = 0.6, print.output = 0, save.SNN = F) 
PrintFindClustersParams(object = pbmc) 
## Parameters used in latest FindClusters calculation run on: 2018-12-27 14:45:42
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function    Algorithm         n.start         n.iter
##      1                   1                 100             10
## -----------------------------------------------------------------------------
## Reduction used          k.param          prune.SNN
## Error in if (n <= 0) {: argument is of length zero

t-SNE

As input to the tSNE, we suggest using the same PCs as input to the clustering analysis, although computing the tSNE based on scaled gene expression is also supported using the genes.use argument.

labSize <- 6
#pbmc <- StashIdent(object = pbmc, save.name = "pre_clustering") 
#pbmc <- SetAllIdent(object = pbmc, id = "pre_clustering") 


pbmc <- RunTSNE(object = pbmc, dims.use = 1:10, do.fast = TRUE)
# note that you can set do.label=T to help label individual clusters
TSNEPlot(object = pbmc, do.label=T, label.size = labSize) 

t-SNE + Metadata

metaVars <- c("CellType","dx","mut","Gender","Age")

for (var in metaVars){
  print(paste("t-SNE Metadata plot for ",var))
  # Metadata plot 
  p1 <- TSNEPlot(pbmc, do.return = T, pt.size = 0.5, group.by = var, do.label = T, 
                 dark.theme=F, plot.title=paste("Color by ",var))
  # t-SNE clusters plot
  p2 <- TSNEPlot(pbmc, do.label = T, do.return = T, pt.size = 0.5, plot.title=paste("Color by t-SNE clusters"))
  print(plot_grid(p1, p2))
}   
## [1] "t-SNE Metadata plot for  CellType"

## [1] "t-SNE Metadata plot for  dx"

## [1] "t-SNE Metadata plot for  mut"

## [1] "t-SNE Metadata plot for  Gender"

## [1] "t-SNE Metadata plot for  Age"

Finding differentially expressed genes (cluster biomarkers)

Biomarkers: All Clusters vs. All Other Clusters ***

### Biomarkers: One Cluster vs. Specific Clusters
# cluster5.markers <- FindMarkers(object = pbmc, ident.1 = 0, ident.2 = c(2), 
#     min.pct = 0.25)
# print(x = head(x = cluster5.markers, n = 3)) 

### Biomarkers: One Cluster vs. All Other Clusters 
# find all markers of a given cluster
# MUST run FindClusters() first
# cluster0.markers <- FindMarkers(object = pbmc, ident.1 = 0, min.pct = 0.25)
# print(x = head(x = cluster0.markers, n = 3))   


# find markers for every cluster compared to all remaining cells, report
# only the positive ones
pbmc.markers <- FindAllMarkers(object = pbmc, only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25)
topBiomarkers <- pbmc.markers %>% group_by(cluster) %>% top_n(2, avg_logFC)

Cluster Biomarker Tests

NA

getTopBiomarker <- function(pbmc.markers, clusterID, topN=1){
  df <- subset(pbmc.markers, p_val_adj<0.05 & cluster==as.character(clusterID)) %>% arrange(desc(avg_logFC))
  top_pct_markers <- df[1:topN,"gene"]
  return(top_pct_markers)
}
clust1_biomarkers <- getTopBiomarker(pbmc.markers, clusterID=1, topN=2)
clust2_biomarkers <- getTopBiomarker(pbmc.markers, clusterID=2, topN=2)




# Top Biomarkers for 
plotBiomarkers <- function(pbmc, biomarkers, cluster){
  biomarkerPlots <- list()
  for (marker in biomarkers){
    print(marker)
    p <- VlnPlot(object = pbmc, features.plot = c(marker), y.log=T, return.plotlist=T)
    biomarkerPlots[[marker]] <- p + ggplot2::aes(alpha=.7) 
  }
  combinedPlot <- do.call(grid.arrange, c(biomarkerPlots, 
                                          list(ncol=2, top=paste("Top DEG Biomarkers for Cluster",cluster))) ) 
  return(combinedPlot) 
}

# Plot top 2 biomarker genes for each 
for (clust in unique(pbmc.markers$cluster)){ 
  biomarkers <- getTopBiomarker(pbmc.markers, clusterID=clust, topN=2)
  plotBiomarkers(pbmc, biomarkers, clust)
} 
## [1] "S100A12"
## [1] "S100A8"
## [1] "RP11-1143G9.4"
## [1] NA
## Error in if (all(unlist(x = strsplit(x = my.var, split = "[0-9]+")) == : missing value where TRUE/FALSE needed

Top Biomarkers: tSNE

FeaturePlot(object = pbmc, features.plot = topBiomarkers$gene, cols.use = c("grey", "blue"), 
    reduction.use = "tsne")

Top Biomarkers: Heatmap

top10 <- pbmc.markers %>% group_by(cluster) %>% top_n(10, avg_logFC)
# setting slim.col.label to TRUE will print just the cluster IDS instead of
# every cell name
DoHeatmap(object = pbmc, genes.use = top10$gene, slim.col.label = TRUE, remove.key = TRUE)

Assigning cell type identity to clusters

Label Clusters by Biomarker

current.cluster.ids <- unique(pbmc.markers$cluster) #c(0, 1, 2, 3, 4, 5, 6, 7)
top1 <- pbmc.markers %>% group_by(cluster) %>% top_n(1, avg_logFC)
new.cluster.ids <- top1$gene #c("CD4 T cells", "CD14+ Monocytes", "B cells", "CD8 T cells", "FCGR3A+ Monocytes", "NK cells", "Dendritic cells", "Megakaryocytes")

pbmc@ident <- plyr::mapvalues(x = pbmc@ident, from = current.cluster.ids, to = new.cluster.ids)
TSNEPlot(object = pbmc, do.label = TRUE, pt.size = 0.5) 

Further subdivisions within cell types

If you perturb some of our parameter choices above (for example, setting resolution=0.8 or changing the number of PCs), you might see the CD4 T cells subdivide into two groups. You can explore this subdivision to find markers separating the two T cell subsets. However, before reclustering (which will overwrite object@ident), we can stash our renamed identities to be easily recovered later.

# First lets stash our identities for later
#pbmc <- StashIdent(object = pbmc, save.name = "ClusterNames_0.6")

# Note that if you set save.snn=T above, you don't need to recalculate the
# SNN, and can simply put: pbmc <- FindClusters(pbmc,resolution = 0.8)
pbmc <- FindClusters(object = pbmc, reduction.type = "pca", dims.use = 1:10, 
    resolution = 0.8, print.output = FALSE)

## Warning in BuildSNN(object = object, genes.use = genes.use, reduction.type
## = reduction.type, : Build parameters exactly match those of already
## computed and stored SNN. To force recalculation, set force.recalc to TRUE.

# Demonstration of how to plot two tSNE plots side by side, and how to color
# points based on different criteria
plot1 <- TSNEPlot(object = pbmc, do.return = TRUE, no.legend = TRUE, do.label = TRUE, label.size=labSize)
plot2 <- TSNEPlot(object = pbmc, do.return = TRUE, group.by = "ClusterNames_0.6", 
                  no.legend = TRUE, do.label = TRUE, label.size=labSize)
## Error in FetchData(object = object, vars.all = group.by): Error: ClusterNames_0.6 not found
plot_grid(plot1, plot2)
## Error in plot_grid(plot1, plot2): object 'plot2' not found
# Find discriminating markers
tcell.markers <- FindMarkers(object = pbmc, ident.1 = 0, ident.2 = 1)

# Most of the markers tend to be expressed in C1 (i.e. S100A4). However, we
# can see that CCR7 is upregulated in C0, strongly indicating that we can
# differentiate memory from naive CD4 cells.  cols.use demarcates the color
# palette from low to high expression
FeaturePlot(object = pbmc, features.plot = top1$gene, cols.use = c("green", "blue"))

# pbmc <- SetAllIdent(object = pbmc, id = "ClusterNames_0.6") 

Save Results

saveRDS(pbmc, file = "Data/cd14-processed.rds")